In [6]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
from IPython.display import Image, HTML
import random
import math
import pysolar
import datetime as dt
import pytz
import matplotlib.dates as mpd
import scipy
import scipy.signal as signal
import sklearn
import numexpr as ne
from datacharm import *
from datacharm.dataplots import tableau20, ch_color
from enerpi.base import timeit
from enerpi.api import enerpi_data_catalog
from enerpi.enerplot import plot_tile_last_24h, plot_power_consumption_hourly
from enerpi.sunposition import sun_position
LAT = 38.631463
LONG = -0.866402
ELEV_M = 500
TZ = pytz.timezone('Europe/Madrid')
FS = (16, 10)
sns.set_style('ticks')
pd.set_option('display.width', 120)
def _tslim(ax, h0=12, hf=22, m0=0, mf=0):
xlim = [mpd.num2date(x, tz=TZ) for x in ax.get_xlim()]
new = [mpd.date2num(d.replace(hour=h, minute=m)) for d, h, m in zip(xlim, [h0, hf], [m0, mf])]
ax.set_xlim(new)
return ax
# Catálogo y lectura de todos los datos.
cat = enerpi_data_catalog()
data, data_s = cat.get_all_data()
print_info(data_s.tail())
LDR = pd.DataFrame(data.ldr).tz_localize(TZ)
print_cyan(LDR.describe().T.astype(int))
LDR.hist(bins=(LDR.max() - LDR.min() + 1).values[0] // 5, log=True, figsize=(18, 6), lw=0, color=tableau20[2])
plt.plot()
POWER = pd.DataFrame(data.power).tz_localize(TZ)
print_cyan(POWER.describe().T.astype(int))
POWER.hist(bins=int((POWER.max() - POWER.min() + 1).values[0] // 5), log=True, figsize=(18, 6), lw=0, color=tableau20[4])
Out[6]:
In [1126]:
sns.kdeplot(POWER.power.values, shade=True, gridsize=6000)
Out[1126]:
In [5]:
sns.palplot(tableau20)
In [34]:
homog_power = POWER.resample('1s').mean().fillna(method='bfill', limit=3).fillna(-1) #.round().astype('int16')
homog_power.info()
In [1268]:
POWER.power.round().value_counts()
Out[1268]:
In [41]:
power_day13 = homog_power.loc['2016-08-13']
ax = power_day13.plot(lw=1, alpha=.8)
power_day14 = homog_power.loc['2016-08-14']
power_day14.plot(ax=ax, lw=1, alpha=.8)
Out[41]:
In [30]:
power_day13.resample('1s').mean().fillna(method='bfill', limit=3).fillna(-1).round().astype(int)
Out[30]:
In [42]:
power_standby = homog_power.loc['2016-08-29']
power_standby.plot(lw=1, alpha=.8)
Out[42]:
In [103]:
mf5 = pd.Series(signal.medfilt(power_standby.power, kernel_size=5), index=power_standby.index, name='mf_5')
mf11 = pd.Series(signal.wiener(power_standby.power, 15), index=power_standby.index, name='wiener_11')
t0, tf = '3:10', '3:20'
ax = power_standby.between_time(t0, tf).plot(lw=1)
mf5.between_time(t0, tf).plot(ax=ax)
mf11.between_time(t0, tf).plot(ax=ax)
plt.show()
t0, tf = '3:20', '3:25'
ax = power_standby.between_time(t0, tf).plot(lw=1)
mf11.between_time(t0, tf).plot(ax=ax)
mf5.between_time(t0, tf).plot(ax=ax)
Out[103]:
In [100]:
power_standby.hist(bins=200, lw=0, log=True, alpha=.5)
mf11.hist(bins=200, lw=0, log=True, alpha=.5)
Out[100]:
In [104]:
N = len(power_standby)
out_fft = np.fft.rfft((power_standby.power - power_standby.power.mean()).values)
out_fft_mf = np.fft.rfft((mf5 - mf5.mean()).values)
out_fft_mf2 = np.fft.rfft((mf11 - mf11.mean()).values)
plt.figure(figsize=(18, 15))
corte = 350 # seg
plt.subplot(3, 2, 1)
plt.plot(np.abs(out_fft[:corte + 1]), lw=1, alpha=.7)
plt.subplot(3, 2, 2)
plt.plot(np.abs(out_fft[corte:N//2]), lw=1, alpha=.7)
plt.subplot(3, 2, 3)
plt.plot(np.abs(out_fft_mf[:corte + 1]), lw=1, alpha=.7)
plt.subplot(3, 2, 4)
plt.plot(np.abs(out_fft_mf[corte:N//2]), lw=1, alpha=.7)
plt.subplot(3, 2, 5)
plt.plot(np.abs(out_fft_mf2[:corte + 1]), lw=1, alpha=.7)
plt.subplot(3, 2, 6)
plt.plot(np.abs(out_fft_mf2[corte:N//2]), lw=1, alpha=.7);
In [62]:
plt.plot(np.abs(out_fft_mf2[:60]), lw=1, alpha=.7);
In [ ]:
In [312]:
@timeit('process_power', verbose=True)
def process_power(df_homog_power,
kernel_size=15, roll_window_event=9, threshold_event=10,
margin_td=pd.Timedelta('120s'),
margin_ant=pd.Timedelta('3s')):
"""
Tomando una pd.DataFrame de 'POWER' homogéneo (con resample mean a 1s),
* aplica filtrado 'Wiener' a la señal raw,
* calcula ∆_wiener, deltas de power entre samples
* calcula ∆'_wiener con roll_window_event centrado, detectando máximos y mínimos locales
en los cambios de ∆_wiener, tomándolos como eventos de cambio.
* Agrupa los eventos de cambio en subconjuntos separados, al menos, un pd.Timedelta 'margin_td'.
(forma el evento desde inicio - pd.Timedelta 'margin_ant')
return:
df_power, list_index_events, df_feats_grupos
"""
def _is_event(x):
mid = len(x) // 2
xmin = np.min(x)
xmax = np.max(x)
if ((xmax - xmin) > threshold_event):
if (x[mid] == xmin):
return -1
elif (x[mid] == xmax):
return 1
return 0
df_power = df_homog_power.copy()
df_power['wiener'] = signal.wiener(df_power.power, kernel_size).astype('float32')
df_power['medfilt'] = signal.medfilt(df_power.power, kernel_size).astype('float32')
df_power['roll_std'] = df_power.wiener.rolling(11).std().astype('float32')
df_power['deltas'] = df_power.wiener.diff().fillna(0)
df_power['eventos'] = df_power['deltas'].diff().fillna(0).rolling(roll_window_event, center=True
).apply(_is_event).fillna(0).astype('int16')
list_index_events = []
last_event_init = None
events_in_interval = []
for t in df_power[df_power['eventos'] != 0].index:
if last_event_init is None:
events_in_interval.append(t)
elif t > last_event_init + margin_td:
list_index_events.append(events_in_interval)
events_in_interval = [t]
else:
events_in_interval.append(t)
last_event_init = t
print_info('Nº de grupos de eventos: {} (para un nº de eventos total de: {})'
.format(len(list_index_events), len(df_power[df_power['eventos'] != 0])))
sub_events = []
cols = ['power', 'medfilt', 'wiener', 'deltas', 'eventos']
cols_feat_grupos = ['index_ge', 't0', 'duracion', 'wiener_min', 'wiener_mean', 'wiener_max', 'wiener_std', 'wiener_median',
'eventos_sum', 'eventos_abs_sum', 'deltas_max', 'deltas_min', 'deltas_sum', 'deltas_abs_sum']
for i, ts in enumerate(list_index_events):
t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
d_i = df_power.loc[t0:tf, cols]
resumen_i = (i, d_i.index[0], len(d_i),
d_i.wiener.min(), d_i.wiener.mean(), d_i.wiener.max(), d_i.wiener.std(), d_i.wiener.median(),
d_i.eventos.sum(), d_i.eventos.abs().sum(),
d_i.deltas.max(), d_i.deltas.min(), d_i.deltas.sum(), d_i.deltas.abs().sum())
sub_events.append(resumen_i)
df_feats_grupos = pd.DataFrame(sub_events, columns=cols_feat_grupos).set_index(cols_feat_grupos[0])
return df_power, list_index_events, df_feats_grupos
df_power, list_index_events, df_feats_grupos = process_power(power_standby)
df_feats_grupos.describe()
Out[312]:
In [510]:
# Análisis frecuencial de eventos / comparación de fft's
# Separación de eventos por duración. KMeans++ de k clusters
k = 9
km_dur_event = KMeans(n_clusters=k, n_jobs=-1).fit(df_feats_grupos_all_2.duracion.values.reshape(-1, 1))
print_info(sorted(km_dur_event.cluster_centers_))
amplitudes = []
grupo_durac = []
for i, ts in enumerate(lista_grupos_eventos_total_2):
t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
d_i = POWER_FILT_2.loc[t0:tf, cols]
if len(d_i) > 5:
w = blackman(len(d_i))
amplitudes_i = np.sqrt(np.abs(scipy.fftpack.rfft(d_i.power * w)))[:(len(d_i) // 2)]
#amplitudes_i = np.sqrt(np.abs(np.fft.rfft(d_i.power)))[:(len(d_i) // 2)]
amplitudes.append(amplitudes_i)
grupo_durac.append(km_dur_event.labels_[i])
colors = tableau20[::2][:len(km_dur_event.cluster_centers_)]
plt.figure(figsize=(18, 12))
for a, g in zip(amplitudes, grupo_durac):
color = colors[g]
if len(a) > 200:
params = dict(lw=.75, alpha=.3, color=color)
else:
params = dict(lw=.1, alpha=.01, color=color)
plt.plot(a[2:], **params)
plt.xlim(0, 600);
In [511]:
f, axes = plt.subplots(1, 2, figsize=(20, 12))
ymax, ne = 0, 0
for a, g in zip(amplitudes, grupo_durac):
color = colors[g]
if len(a) > 300:
params = dict(lw=.75, alpha=.35, color=color)
axes[0].plot(a[2:], **params)
ne += 1
ymax = max(ymax, max(a[2:]))
axes[0].annotate('{} Eventos (de ∆T > {} s)'.format(ne, 300), (2000, ymax * .75), ha='left')
ymax, ne = 0, 0
for a, g in zip(amplitudes, grupo_durac):
color = colors[g]
if len(a) <= 300:
if len(a) <= 100:
params = dict(lw=.25, alpha=.03, color=color)
else:
params = dict(lw=.5, alpha=.1, color=color)
axes[1].plot(a[2:], **params)
ne += 1
ymax = max(ymax, max(a[2:]))
axes[1].annotate('{} Eventos (de ∆T ≤ {} s)'.format(ne, 300), (200, ymax * .75), ha='left')
axes[1].set_xlim(0, 300);
In [487]:
big_events = df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 554]
print_info('Hay {} eventos de larga duración (∆T_medio={:.1} s)\n'.format(len(big_events), big_events.duracion.mean()))
plot_mosaico_eventos(POWER_FILT_2, big_events, n_rows=7, n_cols=5, size=4)
In [490]:
amplitudes = []
for t0, dur in zip(big_events.t0, big_events.duracion):
tf = t0 + pd.Timedelta(seconds=dur) + margin_td
t0 -= margin_ant
d_i = POWER_FILT_2.loc[t0:tf, cols]
if len(d_i) > 5:
amplitudes_i = np.sqrt(np.abs(np.fft.rfft(d_i.power)))[:(len(d_i) // 2)]
amplitudes.append(amplitudes_i)
plt.figure(figsize=(18, 12))
for a in amplitudes:
if len(a) > 1800:
params = dict(lw=1, alpha=.5, color='b')
else:
params = dict(lw=1, alpha=.3, color='r')
plt.plot(a[2:], **params)
#plt.xlim(0, 600);
In [509]:
from scipy.signal import blackman
from scipy.signal import hanning
x_fft = POWER_FILT_2.iloc[1000000:1001000].power
w = blackman(len(x_fft))
print_red(x_fft.head(10))
plt.plot(x_fft)
plt.show()
plt.plot(np.sqrt(np.abs(scipy.fftpack.fft(x_fft * w)))[1:len(x_fft) // 2])
Out[509]:
In [495]:
#window = np.hanning(51)
window = np.kaiser(12, 14)
plt.plot(window)
#plt.title("Hann window")
plt.title("Kaiser window")
plt.ylabel("Amplitude")
plt.xlabel("Sample")
plt.show()
plt.figure()
A = np.fft.fft(window, 2048) / 25.5
mag = np.abs(np.fft.fftshift(A))
freq = np.linspace(-0.5, 0.5, len(A))
response = 20 * np.log10(mag)
response = np.clip(response, -100, 100)
plt.plot(freq, response)
plt.title("Frequency response of the Hann window")
plt.ylabel("Magnitude [dB]")
plt.xlabel("Normalized frequency [cycles per sample]")
plt.axis('tight')
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [237]:
from itertools import cycle
cm = cycle(tableau20[2::2])
ax = df_power.wiener.plot(figsize=(18, 8), lw=1, color=tableau20[0], alpha=.4)
for tt in list_index_events:
ax = df_power.wiener.loc[tt[0]:tt[-1]].plot(ax=ax, lw=1, color=next(cm), alpha=.8)
In [539]:
import matplotlib.patches as mpp
@timeit('plot_power_events', verbose=True)
def plot_power_events(df_power, df_feats_grupos):
margin_y = 50
frac_day = 1. / (24*60*60.)
df_feats_p = df_feats_grupos.copy()
df_feats_p['tf'] = df_feats_p['t0'] + df_feats_p.duracion.apply(lambda x: pd.Timedelta(seconds=x))
f, ax = plt.subplots(1, 1, figsize=(18, 8))
df_power.wiener.plot(ax=ax, lw=1, color=tableau20[4], alpha=.7)
for i, row in df_feats_p.iterrows():
r = mpp.Rectangle((mpd.date2num(row.t0) - 15 * frac_day, row.wiener_min - margin_y),
row.duracion * frac_day + 15 * frac_day, row.wiener_max - row.wiener_min + 2 * margin_y,
fill=True, lw=0, alpha=.6, fc=tableau20[6])
ax.add_patch(r)
ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M'))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
ax.xaxis.set_tick_params(labelsize=10)
ax.xaxis.set_tick_params(pad=3)
ax.set_xlabel('')
def _plot_row_evento(df_i, ax=None, row=None, with_raw=True):
margin_y = 50
frac_day = 1. / (24*60*60.)
if ax is None:
_, ax = plt.subplots(1, 1)
ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M'))
if with_raw:
df_i['power'].plot(ax=ax, lw=.5, color=tableau20[0], alpha=.4)
df_i['wiener'].plot(ax=ax, lw=1, color=tableau20[4], alpha=.8)
if row is not None:
r = mpp.Rectangle((mpd.date2num(row.t0) - 15 * frac_day, row.wiener_min - margin_y),
row.duracion * frac_day + 15 * frac_day, row.wiener_max - row.wiener_min + 2 * margin_y,
fill=True, lw=0, alpha=.6, fc=tableau20[6])
ax.add_patch(r)
ax.annotate('E_{}'.format(row.name), (.07, .9), xycoords='axes fraction')
plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
ax.xaxis.set_tick_params(labelsize=10)
ax.xaxis.set_tick_params(pad=3)
ax.set_xlabel('')
return ax
@timeit('plot_mosaico_eventos', verbose=True)
def plot_mosaico_eventos(df_power, df_feats_grupos, n_rows=3, n_cols=3, size=5):
margin_event = pd.Timedelta('5min')
df_feats_p = df_feats_grupos.copy()
df_feats_p['tf'] = df_feats_p['t0'] + df_feats_p.duracion.apply(lambda x: pd.Timedelta(seconds=x))
f, axes = plt.subplots(n_rows, n_cols, figsize=(size * n_cols, size * n_rows))
axes = axes.ravel()
for i, (i_r, row) in enumerate(df_feats_p.iterrows()):
if i >= len(axes):
break
df_i = df_power.loc[row.t0 - margin_event: row.tf + margin_event]
ax = _plot_row_evento(df_i, axes[i], row, with_raw=True)
In [540]:
df_power, list_index_events, df_feats_grupos = process_power(homog_power.loc['2016-08-29'])
plot_power_events(df_power, df_feats_grupos)
plt.show()
plot_mosaico_eventos(df_power, df_feats_grupos, n_rows=3, n_cols=3, size=4)
plt.show()
plot_mosaico_eventos(df_power, df_feats_grupos.iloc[9:], n_rows=3, n_cols=3, size=4)
In [300]:
df_power, list_index_events, df_feats_grupos = process_power(homog_power.loc['2016-08-13'])
plot_power_events(df_power, df_feats_grupos)
plt.show()
plot_mosaico_eventos(df_power, df_feats_grupos, n_rows=5, n_cols=4, size=4)
In [518]:
ax.annotate?
In [313]:
process_params = dict(kernel_size=15, roll_window_event=9, threshold_event=10,
margin_td=pd.Timedelta('120s'), margin_ant=pd.Timedelta('3s'))
POWER_FILT, lista_grupos_eventos_total, df_feats_grupos_all = process_power(homog_power, **process_params)
POWER_FILT.info()
df_feats_grupos_all.columns
Out[313]:
In [304]:
df_feats_grupos_all.duracion.hist(bins=500, lw=0, log=True)
Out[304]:
In [522]:
plot_mosaico_eventos(POWER_FILT, df_feats_grupos_all[df_feats_grupos_all.duracion > 300],
n_rows=3, n_cols=3, size=4)
In [321]:
process_params_2 = dict(kernel_size=11, roll_window_event=7, threshold_event=7,
margin_td=pd.Timedelta('30s'), margin_ant=pd.Timedelta('2s'))
POWER_FILT_2, lista_grupos_eventos_total_2, df_feats_grupos_all_2 = process_power(homog_power, **process_params_2)
df_feats_grupos_all_2.duracion.hist(bins=500, lw=0, log=True)
Out[321]:
In [527]:
plot_mosaico_eventos(POWER_FILT_2, df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 550],
n_rows=7, n_cols=5, size=4)
plt.show()
df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 1000].head()
Out[527]:
In [543]:
def _get_evento(id_ev, df_feats_grupos, df_power, verbose=False):
df_ev_f = df_feats_grupos.loc[id_ev]
#df_ev = df_power[df_power.intervalo == id_ev + 1]
df_ev = df_power.loc[df_ev_f.t0:df_ev_f.t0 + pd.Timedelta(seconds=df_ev_f.duracion)]
if verbose:
print_info('* Evento {}. Features:\n{}\nHead:\n{}\nTail:\n{}'
.format(id_ev, pd.DataFrame(df_ev_f).T, df_ev.head(3), df_ev.tail(3)))
return df_ev
_plot_row_evento(_get_evento(3512, df_feats_grupos_all_2, POWER_FILT_2, verbose=True), row=df_feats_grupos_all_2.loc[3512]);
In [613]:
grupo_1 = [167, 168, 1692, 1693, 2354, 2897, 3512, 3082] # 3082 no es del grupo
f, axes = plt.subplots(4, 2, figsize=(8, 16))
f2, axes_2 = plt.subplots(4, 2, figsize=(8, 16))
for id_ev, ax, ax_2 in zip(grupo_1, axes.ravel(), axes_2.ravel()):
df_i = _get_evento(id_ev, df_feats_grupos_all_2, POWER_FILT_2)
fft_mag_i = np.sqrt(np.abs(scipy.fftpack.rfft(df_i.power)[:len(df_i) // 2]))
abs_freqs = np.array(list(sorted(zip(fft_mag_i[1:], range(1, fft_mag_i.shape[0])), reverse=True)))
#abs_freqs.sort(axis=0)
#print(np.argmax(fft_mag_i[1:]), fft_mag_i[np.argmax(fft_mag_i[1:]) + 1])
ax.plot(fft_mag_i, lw=1, color=tableau20[0])
max_abs, idx_max_abs = np.max(fft_mag_i), np.argmax(fft_mag_i)
for a, f in abs_freqs[:3,:]:
ax.annotate('f={}\n{:.2f}'.format(int(f), a / max_abs), (f, a),
xytext=(60, 10), textcoords='offset points', ha='right', fontsize=8, color=tableau20[1],
arrowprops=dict(arrowstyle="->",
connectionstyle="arc3,rad=0.3"))
ax.annotate('Amax={:.1f}, en f={:.0f}.\n* Best 5 freqs={}'.format(max_abs, idx_max_abs, abs_freqs[:5,1]), (.04, .85),
xycoords='axes fraction', ha='left', fontsize=12, color=tableau20[2])
_plot_row_evento(df_i, ax=ax_2)
In [ ]:
In [1128]:
#autocorrelation_plot(df_i.power - df_i.power.mean())
sns.set_style('ticks')
def _gen_ts_sinusoidal(Ts=1, tf=3600,
freqs=[1 / 300, 1 / 225, 1 / 60, 1/20],
amps=[3, 7, 2, .5],
offsets=[3, 7, -5, .15],
mean=30):
N = tf // Ts
tt = pd.DatetimeIndex(freq=pd.Timedelta(seconds=Ts), start=pd.Timestamp.now(tz=TZ).replace(microsecond=0),
periods=N)
t = np.linspace(0, tf, N)
x = mean #+ np.random.random(N) * 0.3
for a, f, o in zip(amps, freqs, offsets):
x += a * np.cos(2*np.pi*f*t + o)
return tt, x
def _plot_tt_fft(tt, x, df_fft, best_fft=None, log_mag=False, figsize=(8, 8)):
# PLOT tt + fft
fig, axes = plt.subplots(2, 1, figsize=figsize,
gridspec_kw=dict(hspace=0, wspace=0, height_ratios=[.45, .55]))
ax = axes[0]
ax.xaxis.tick_top()
ax.plot(tt, x, color=tableau20[4], lw=1, alpha=.8)
ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M:%S'))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
ax.xaxis.set_tick_params(labelsize=8, pad=2)
# ax.set_xlabel('Time', labelpad=1)
ax.set_ylabel('(Power) time series', labelpad=3)
ax.grid('on', axis='x')
ax.set_yticks(ax.get_yticks()[1:])
# FFT plot
ax = axes[1]
ax.xaxis.tick_bottom()
df_fft.mag.plot(ax=ax, color=tableau20[0], lw=1.25, alpha=.8, label='(Mag)')
if best_fft is not None:
for i, (f, row) in enumerate(best_fft.iterrows()):
fr = row.frac_mag
disp = 1 - fr
ax.plot([f], [row.mag], 'o', markersize=10, markeredgewidth=1.5, markeredgecolor=tableau20[2], markerfacecolor=[0, 0, 0, 0])
ax.annotate('f=1/{:.1f}, A={:.2g}'.format(row.cycles_f, row.frac_mag), (f, row.mag),
#xytext=(.75 + i * .05, row.frac_mag * .9), textcoords='axes fraction',
xytext=(.97, .8 - i * .075), textcoords='axes fraction',
ha='right', va='bottom', fontsize=8, color=tableau20[2],
arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=-0.05"))
ax.annotate('A(f0)={:.2f}'.format(np.sqrt(z_0) / best_fft.mag.iloc[0]), (.97, .92), xycoords='axes fraction',
ha='right', fontsize=10, color=tableau20[14])
ax.set_xlabel('Freq (Hz)', labelpad=1)
#ax.set_xlim((0, max(.1, best_fft.index.max())))
ax.set_xlim((0, min(.1, best_fft.index.max() * 10)))
if log_mag:
ax.set_ylabel('Mag (20·log10|mag|)', labelpad=3)
ax.set_ylim((-60, ax.get_ylim()[1]))
else:
ax.set_ylabel('Mag (√|mag|)', labelpad=3)
#ax.set_xlim((2/N, (1/Ts)/2))
#ax.set_xlim((2/N, .5))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
ax.xaxis.set_tick_params(labelsize=9, pad=3)
ax.set_yticks(ax.get_yticks()[:-1])
#plt.show()
return axes
@timeit('CALC FFT FEATURES', verbose=True)
def _feat_fft(tt, x, return_best=5, log_mag=False, plot=False, figsize=(8, 8), verbose=False):
# Scipy Real FFT
z_pure = 2 * scipy.fftpack.rfft(x) / len(x) # FFT
y = scipy.fftpack.rfftfreq(len(x), d=(tt[1] - tt[0]).total_seconds()) # Frequency data
#idx = np.argsort(y)
#idx_mag = np.argsort(z)[::-1]
# With pandas
z_0 = z_pure[0]
df = pd.DataFrame([z_pure[1:]**2, y[1:]], index=['mag', 'freq']).T.abs()
if log_mag:
df_fft = df.groupby('freq').sum().apply(lambda x: 20*np.log10(x))
df_fft['frac_mag'] = (df_fft['mag'] / df_fft['mag'].max()).apply(lambda x: np.exp(x/20))
else:
df_fft = df.groupby('freq').sum().apply(np.sqrt)
df_fft['frac_mag'] = df_fft['mag'] / df_fft['mag'].max()
df_fft['cycles_f'] = [1/x for x in df_fft.index]
if verbose:
print(len(z_pure), sum(z_pure), sum(z_pure**2))
print_red(df_fft.sort_values(by='mag', ascending=False).head())
if log_mag:
best_fft = df_fft.sort_values(by='mag', ascending=False).head(return_best)
else:
best_fft = df_fft[df_fft.frac_mag > .05].sort_values(by='mag', ascending=False).head(return_best)
if plot:
axes = _plot_tt_fft(tt, x, df_fft, best_fft, log_mag=log_mag, figsize=figsize)
plt.show()
return best_fft
#_feat_fft(*_gen_ts_sinusoidal(amps=[3, 7, 2, .5]), plot=True, figsize=(4, 4));
_feat_fft(*_gen_ts_sinusoidal(amps=[3, 7, 2, .5], tf=36000), plot=True, figsize=(4, 4));
_feat_fft(*_gen_ts_sinusoidal(amps=[3, 7, 2, .5], tf=1800), plot=True, figsize=(4, 4));
In [1130]:
power_standby
#df1 = _feat_fft(power_standby.index, power_standby.power, plot=True, log_mag=True, figsize=(10, 7))
df1 = _feat_fft(power_standby.index, power_standby.power, plot=True, log_mag=False, figsize=(10, 7))
df2 = _feat_fft(power_standby.index, power_standby.wiener, plot=True, log_mag=False, figsize=(10, 7))
In [ ]:
In [1319]:
@timeit('slice_window_fft', verbose=True)
def _slice_window_fft(data, size_w, frac_solape=.5, only_complete=True):
def _feat_fft_fast(tt, x, with_freq=True):
# Scipy Real FFT
z_pure = scipy.fftpack.rfft(x) / len(x)
mags = np.sqrt(z_pure[1:-1:2]**2 + z_pure[2:-1:2]**2)
if with_freq:
y = scipy.fftpack.rfftfreq(len(x), d=(tt[1] - tt[0]).total_seconds()) # Frequency data
freqs = y[1:-1:2]
return freqs, mags
return mags
N = len(data)
start = data.index[0]
end = data.index[-1]
T = end - start
Ts = data.index[1] - start
delta = (1 - frac_solape) * size_w * Ts
n = (T - size_w * Ts) / delta + 1
results, freqs, init = [], None, False
while start < end:
data_w = data.loc[start:start + size_w * Ts - Ts]
#print(data_w.tail(2))
if only_complete and (data_w.shape[0] == size_w):
if not init:
freqs, mags = _feat_fft_fast(data_w.index, data_w.power, with_freq=True)
init = True
else:
mags = _feat_fft_fast(data_w.index, data_w.power, with_freq=False)
results.append((start, mags))
#else:
# break
start += delta
#return pd.DataFrame(results)
return freqs, results
# %timeit _slice_window_fft(power_standby, 14400, frac_solape=.75)
# 100 loops, best of 3: 10.4 ms per loop
def fft_window_power(power, delta='3h', step='15min'):
window_delta = pd.Timedelta(delta)
window_step = pd.Timedelta(step)
fr_s = 1 - window_step / window_delta
window_size = window_delta / power.index.freq
print_cyan('Window size: {:.0f}, step: {}, (solape={:.3f})'.format(window_size, window_step, fr_s))
freqs, results = _slice_window_fft(power, window_size, fr_s)
return pd.DataFrame([pd.Series(data=r[1], index=freqs, name=r[0]) for r in results]).T #freqs, results
df_results = fft_window_power(power_standby, delta='3h', step='10min')
df_results_2 = fft_window_power(power_day14, delta='3h', step='10min')
df_results_3 = fft_window_power(power_day13, delta='3h', step='10min')
#fft_slice =
#fft_slice
In [1321]:
# FFT POWER
results_FFT = fft_window_power(POWER_FILT_2, delta='10min', step='5min')
print_ok(results_FFT.shape)
#results_FFT
In [1292]:
plt.figure(figsize=(16, 9))
ax = plt.subplot(121)
results_FFT.loc[:.006].plot(ax=ax, lw=.5, color='k', alpha=.01)
plt.legend([])
ax = plt.subplot(122)
results_FFT.loc[.006:].plot(ax=ax, lw=.5, color='k', alpha=.1)
plt.legend([])
Out[1292]:
In [1296]:
results_FFT.loc[:.006].shape, results_FFT.loc[.006:].shape, results_FFT.loc[:.2].shape
#(results_FFT.loc[:.006] - results_FFT.loc[:.006].rolling(10).mean()) #.plot()
Out[1296]:
In [1329]:
from sklearn.cluster import KMeans, DBSCAN, AffinityPropagation, AgglomerativeClustering
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
X = results_FFT.T.values
X_std = StandardScaler().fit_transform(X)
m_pca_1 = PCA(n_components=2, copy=True, whiten=True)
X_pca = m_pca_1.fit_transform(X)
m_pca_1
Out[1329]:
In [1332]:
len(X), len(X_pca), len(model_hd.labels_)
Out[1332]:
In [ ]:
m_pca_20 = PCA(n_components=20, copy=True, whiten=True)
X_pca20 = m_pca_20.fit_transform(X_std)
print(X_pca20.shape)
m_tsne_20 = TSNE(n_components=20)
X_tsne20 = m_tsne_20.fit_transform(X_std)
print(m_tsne_20.shape)
In [1331]:
k = 11
#labels_pca, n_clusters_pca, model_pca, X_pca_2c = pca_clustering(X, k, whiten=True)
plot_clustering_figure(X, model_hd.labels_, plot_clustering_as_voronoi, X_pca, model_pca)
In [1287]:
ax = plt.subplot(121)
df_results_2.loc[:.006].plot(ax=ax, lw=.75, color='k', alpha=.1)
plt.legend([])
ax = plt.subplot(122)
df_results_2.plot(ax=ax, lw=.5, color='k', alpha=.005)
plt.legend([])
Out[1287]:
In [1270]:
#pd.DataFrame([pd.Series(data=r[1], index=[1/x for x in freqs], name=r[0]) for r in results]).T.plot(lw=.75)
#pd.DataFrame([pd.Series(data=r[1], index=freqs, name=r[0].time()) for r in results]).T.loc[:.06].plot(lw=.75, alpha=.3, legend='off')
ax = plt.subplot(121)
df_results.loc[:.006].plot(ax=ax, lw=.75, color='k', alpha=.01)
plt.legend([])
ax = plt.subplot(122)
df_results.plot(ax=ax, lw=.5, color='k', alpha=.005)
plt.legend([])
Out[1270]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [1132]:
ax = plt.subplot(111)
labels_c = ['c{:02d}'.format(i + 1) for i in range(7)]
labels_mag = ['mag{:02d}'.format(i + 1) for i in range(7)]
for id_w in fft_slice.index:
ax.plot(fft_slice.loc[id_w].loc[labels_c], fft_slice.loc[id_w].loc[labels_mag], lw=1)
In [1133]:
pca_clustering(fft_slice.drop('start', axis=1), 3)
Out[1133]:
In [1136]:
from hdbscan import HDBSCAN
X_features = fft_slice.drop('start', axis=1).values
k = 5
labels_pca, n_clusters_pca, model_pca, X_pca_2c = pca_clustering(X_features, k, whiten=True)
plot_clustering_figure(X_features, labels_pca, plot_clustering_as_voronoi, X_pca_2c, model_pca)
model_hd = HDBSCAN().fit(X_features)
plot_clustering_figure(X_pca_2c, model_hd.labels_, plot_clustering)
In [919]:
@timeit('local_maxima_minima', verbose=True)
def _local_max(serie, roll_w=3, verbose=False):
def _is_local_max(x):
#if np.max(x) == x[len(x) // 2]:
if np.argmax(x) == len(x) // 2:
return True
return False
if verbose:
print_secc('LOCAL MAX de {} valores. Centered window of {} samples'
.format(len(serie), roll_w))
maximos = serie.rolling(roll_w, center=True).apply(_is_local_max).fillna(0).astype(bool)
if verbose:
print_info(serie[maximos].head())
return maximos
#cycles = [1/x for x in y]
#plt.scatter(cycles, z)
#plt.xlim(0, 310)
#cycles
#z[reversed(idx_mag)]
#np.argsort?
s = df_fft.mag
print_red(s.max())
maximos = _local_max(s, 7)
df_fft[maximos][s[maximos] / s.max() > .07]
Out[919]:
In [612]:
#abs_freqs = np.array(list(sorted(zip(fft_mag_i[1:], range(1, fft_mag_i.shape[0])), reverse=True)))
#abs_freqs.sort(axis=0)
#ax = _plot_row_evento(df_i)
ax = plt.subplot()
ax.plot(fft_mag_i, lw=1, color=tableau20[0])
max_abs, idx_max_abs = np.max(fft_mag_i), np.argmax(fft_mag_i)
for a, f in abs_freqs[:3,:]:
ax.annotate('f={}\n{:.2f}'.format(int(f), a / max_abs), (f, a),
xytext=(60, 10), textcoords='offset points', ha='right', fontsize=8, color=tableau20[1],
arrowprops=dict(arrowstyle="->",
connectionstyle="arc3,rad=0.3"))
ax.annotate('Amax={:.1f}, en f={:.0f}.\n* Best 5 freqs={}'.format(max_abs, idx_max_abs, abs_freqs[:5,1]), (.04, .85),
xycoords='axes fraction', ha='left', fontsize=12, color=tableau20[2])
Out[612]:
In [369]:
#df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 1000]
import sklearn.cluster as cluster
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from hdbscan import HDBSCAN
cols_cluster = ['deltas_sum', 'wiener_median', 'wiener_std', 'duracion', 'wiener_min', 'wiener_mean', 'wiener_max',
'eventos_sum', 'eventos_abs_sum', 'deltas_max', 'deltas_min', 'deltas_abs_sum']
df_X = df_feats_grupos_all_2[cols_cluster]
X = df_X.values
X_std = StandardScaler().fit_transform(X)
In [366]:
from enerpi.process_ldr import pca_clustering, plot_clustering, plot_clustering_as_voronoi, plot_clustering_figure
k = 36
labels_pca, n_clusters_pca, model_pca, X_pca = pca_clustering(X, k, whiten=True, labels_true=None)
print(n_clusters_pca)
plot_clustering_figure(X, labels_pca, plot_clustering_as_voronoi, X_pca, model_pca)
In [375]:
model_hd = HDBSCAN(min_samples=3)
model_hd.fit(X_std)
vc = pd.Series(model_hd.labels_).value_counts()
print(len(vc) - 1, 'clusters')
print_info(vc.head())
plot_clustering_figure(X_std, model_hd.labels_, plot_clustering, model_hd)
In [377]:
df_feats_grupos_all_2
Out[377]:
In [380]:
POWER_FILT_2['intervalo'] = 0
POWER_FILT_2.loc[pd.DatetimeIndex(df_feats_grupos_all_2.t0), 'intervalo'] = 1
POWER_FILT_2['intervalo'] = POWER_FILT_2['intervalo'].cumsum()
POWER_FILT_2
Out[380]:
In [385]:
gb_int = POWER_FILT_2.groupby('intervalo')
gb_int.wiener.count().hist(log=True, bins=500, lw=0)
Out[385]:
In [387]:
duracion_int = gb_int.wiener.count()
duracion_int[duracion_int > 10000]
Out[387]:
In [396]:
POWER_FILT_2.iloc[:100000].wiener.rolling(10).std().plot()
Out[396]:
In [1]:
#POWER_FILT_2[POWER_FILT_2.intervalo == 2297]
In [266]:
bootstrap_plot(df_power.wiener, samples=100)
Out[266]:
In [250]:
#ax.add_patch?
mpp.Rectangle?
In [ ]:
In [72]:
b, a = signal.butter(4, 100, 'low', analog=True)
w, h = signal.freqs(b, a)
plt.semilogx(w, 20 * np.log10(abs(h)))
plt.title('Butterworth filter frequency response')
plt.xlabel('Frequency [radians / second]')
plt.ylabel('Amplitude [dB]')
plt.margins(0, 0.1)
plt.grid(which='both', axis='both')
plt.axvline(100, color='green') # cutoff frequency
plt.show()
In [73]:
signal.butter?
In [106]:
plt.figure(figsize=(18, 5))
t0, tf = 4600, 4800
plt.plot(power_standby.power.iloc[t0:tf].values, lw=2, alpha=.7, color='blue')
#plt.plot(signal.medfilt(power_standby.power, 3)[t0:tf], lw=1, alpha=.7, color='orange')
plt.plot(signal.wiener(power_standby.power, 15)[t0:tf], lw=2, alpha=.7, color='red')
Out[106]:
In [107]:
power_standby['medfilt'] = signal.medfilt(power_standby.power, 15)
power_standby['wiener'] = signal.wiener(power_standby.power, 15)
power_standby.plot(lw=1, alpha=.7)
Out[107]:
In [108]:
power_standby.iloc[t0:tf].plot(lw=1, alpha=.7)
Out[108]:
In [109]:
power_standby.wiener.plot(lw=1, alpha=.7)
Out[109]:
In [114]:
power_standby.wiener.diff().fillna(0).abs().hist(bins=500, log=True)
Out[114]:
In [128]:
power_standby['event'] = False
power_standby.loc[power_standby.wiener.diff().fillna(0).abs() > 7, 'event'] = True
ax = power_standby.wiener.plot()
ax.scatter(power_standby[power_standby.event].index, power_standby[power_standby.event].wiener, color='red')
Out[128]:
In [136]:
deltas = power_standby.diff().fillna(0)
deltas.wiener.rolling(3).mean() #apply(lambda x: np.cumsum(x))
Out[136]:
In [146]:
cumsum_event = 0
s_cumsum = []
s_event = []
for v in deltas.wiener:
if abs(v) > 10 and not s_event[-1]:
cumsum_event = v
s_event.append(True)
else:
s_event.append(False)
cumsum_event += v
s_cumsum.append(cumsum_event)
pd.DataFrame([s_cumsum, s_event], index=['event_acum', 'is_event']).T.plot()
Out[146]:
In [169]:
THRESHOLD_EVENT = 10
def _is_event(x):
mid = len(x) // 2
xmin = np.min(x)
xmax = np.max(x)
if ((xmax - xmin) > THRESHOLD_EVENT):
if (x[mid] == xmin):
return -1
elif (x[mid] == xmax):
return 1
return 0
eventos = deltas.wiener.diff().fillna(0).rolling(9, center=True).apply(_is_event).fillna(0)
eventos.plot(lw=1)
plt.show()
eventos.cumsum().plot(lw=1)
Out[169]:
In [170]:
f, ax = plt.subplots(1, 1, figsize=(18, 6))
ax = power_standby.wiener.iloc[t0:tf].plot(lw=2, color=tableau20[4])
ax2 = plt.twinx(ax)
ax2 = eventos.iloc[t0:tf].plot(ax=ax2, lw=1, color=tableau20[6])
ax.set_ylim((190, 250))
Out[170]:
In [174]:
eventos.abs().resample('5min').sum().plot()
Out[174]:
In [187]:
tt_ev = eventos[eventos != 0].index
tt_ev = pd.Index(tt_ev[1:].values - tt_ev[:-1].values)
tt_ev
Out[187]:
In [ ]:
In [ ]:
In [ ]:
In [205]:
deltas = power_standby.diff().fillna(0)
#pd.Index(tt_ev[1:].values - tt_ev[:-1].values)
#tt_ev
margin_td = pd.Timedelta('120s')
t0 = eventos.index[0]
list_index_events = []
last_event_init = None
events_in_interval = []
for t in eventos[eventos != 0].index:
if last_event_init is None:
last_event_init = t
events_in_interval.append(t)
elif t > last_event_init + margin_td:
list_index_events.append(events_in_interval)
events_in_interval = [t]
last_event_init = t
else:
events_in_interval.append(t)
last_event_init = t
len(list_index_events), list_index_events
sub_events = []
margin_ant = pd.Timedelta('5s')
cols = ['power', 'medfilt', 'wiener']
for ts in list_index_events:
print_red(ts)
t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
d_i = deltas.loc[t0:tf, cols].rename(columns)
p_i = power_standby.loc[t0:tf, cols]
sec_i = eventos.loc[t0:tf]
print_cyan(d_i)
print_info(p_i)
print_yellow(sec_i)
break
In [ ]:
In [ ]: